#!/usr/bin/env Rscript
# plot_expression.R
# 读取 CSV 格式的基因表达数据，并绘制各样本表达值的分布图
# 用法: Rscript plot_expression.R expression.csv output_plot.pdf
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(reshape2))

args <- commandArgs(trailingOnly = TRUE)
if(length(args) != 2){
stop("用法: Rscript plot_expression.R expression.csv output_plot.pdf")
}

data <- read.csv(args[1], row.names = 1)
# 转换为长格式
data_long <- melt(data, variable.name = "Sample", value.name = "Expression")

p <- ggplot(data_long, aes(x = Expression)) +
	geom_histogram(binwidth = 1, fill = "steelblue", color = "black") +
	facet_wrap(~Sample, scales = "free") +
	theme_minimal() +
	labs(title = "各样本基因表达分布", x = "表达值", y = "频数")

ggsave(args[2], plot = p)